{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "This is a companion notebook for the book [Deep Learning with Python, Third Edition](https://www.manning.com/books/deep-learning-with-python-third-edition). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThe book's contents are available online at [deeplearningwithpython.io](https://deeplearningwithpython.io)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "!pip install keras keras-hub --upgrade -q"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ[\"KERAS_BACKEND\"] = \"jax\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "cellView": "form",
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "# @title\n",
    "import os\n",
    "from IPython.core.magic import register_cell_magic\n",
    "\n",
    "@register_cell_magic\n",
    "def backend(line, cell):\n",
    "    current, required = os.environ.get(\"KERAS_BACKEND\", \"\"), line.split()[-1]\n",
    "    if current == required:\n",
    "        get_ipython().run_cell(cell)\n",
    "    else:\n",
    "        print(\n",
    "            f\"This cell requires the {required} backend. To run it, change KERAS_BACKEND to \"\n",
    "            f\"\\\"{required}\\\" at the top of the notebook, restart the runtime, and rerun the notebook.\"\n",
    "        )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "## Object detection"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "### Single-stage vs. two-stage object detectors"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "#### Two-stage R-CNN detectors"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "#### Single-stage detectors"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "### Training a YOLO model from scratch"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "#### Downloading the COCO dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "import keras\n",
    "import keras_hub\n",
    "\n",
    "images_path = keras.utils.get_file(\n",
    "    \"coco\",\n",
    "    \"http://images.cocodataset.org/zips/train2017.zip\",\n",
    "    extract=True,\n",
    ")\n",
    "annotations_path = keras.utils.get_file(\n",
    "    \"annotations\",\n",
    "    \"http://images.cocodataset.org/annotations/annotations_trainval2017.zip\",\n",
    "    extract=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "import json\n",
    "\n",
    "with open(f\"{annotations_path}/annotations/instances_train2017.json\", \"r\") as f:\n",
    "    annotations = json.load(f)\n",
    "\n",
    "images = {image[\"id\"]: image for image in annotations[\"images\"]}\n",
    "\n",
    "def scale_box(box, width, height):\n",
    "    scale = 1.0 / max(width, height)\n",
    "    x, y, w, h = [v * scale for v in box]\n",
    "    x += (height - width) * scale / 2 if height > width else 0\n",
    "    y += (width - height) * scale / 2 if width > height else 0\n",
    "    return [x, y, w, h]\n",
    "\n",
    "metadata = {}\n",
    "for annotation in annotations[\"annotations\"]:\n",
    "    id = annotation[\"image_id\"]\n",
    "    if id not in metadata:\n",
    "        metadata[id] = {\"boxes\": [], \"labels\": []}\n",
    "    image = images[id]\n",
    "    box = scale_box(annotation[\"bbox\"], image[\"width\"], image[\"height\"])\n",
    "    metadata[id][\"boxes\"].append(box)\n",
    "    metadata[id][\"labels\"].append(annotation[\"category_id\"])\n",
    "    metadata[id][\"path\"] = images_path + \"/train2017/\" + image[\"file_name\"]\n",
    "metadata = list(metadata.values())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "len(metadata)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "min([len(x[\"boxes\"]) for x in metadata])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "max([len(x[\"boxes\"]) for x in metadata])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "max(max(x[\"labels\"]) for x in metadata) + 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "metadata[435]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "[keras_hub.utils.coco_id_to_name(x) for x in metadata[435][\"labels\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "from matplotlib.colors import hsv_to_rgb\n",
    "from matplotlib.patches import Rectangle\n",
    "\n",
    "color_map = {0: \"gray\"}\n",
    "\n",
    "def label_to_color(label):\n",
    "    if label not in color_map:\n",
    "        h, s, v = (len(color_map) * 0.618) % 1, 0.5, 0.9\n",
    "        color_map[label] = hsv_to_rgb((h, s, v))\n",
    "    return color_map[label]\n",
    "\n",
    "def draw_box(ax, box, text, color):\n",
    "    x, y, w, h = box\n",
    "    ax.add_patch(Rectangle((x, y), w, h, lw=2, ec=color, fc=\"none\"))\n",
    "    textbox = dict(fc=color, pad=1, ec=\"none\")\n",
    "    ax.text(x, y, text, c=\"white\", size=10, va=\"bottom\", bbox=textbox)\n",
    "\n",
    "def draw_image(ax, image):\n",
    "    ax.set(xlim=(0, 1), ylim=(1, 0), xticks=[], yticks=[], aspect=\"equal\")\n",
    "    image = plt.imread(image)\n",
    "    height, width = image.shape[:2]\n",
    "    hpad = (1 - height / width) / 2 if width > height else 0\n",
    "    wpad = (1 - width / height) / 2 if height > width else 0\n",
    "    extent = [wpad, 1 - wpad, 1 - hpad, hpad]\n",
    "    ax.imshow(image, extent=extent)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "sample = metadata[435]\n",
    "ig, ax = plt.subplots(dpi=300)\n",
    "draw_image(ax, sample[\"path\"])\n",
    "for box, label in zip(sample[\"boxes\"], sample[\"labels\"]):\n",
    "    label_name = keras_hub.utils.coco_id_to_name(label)\n",
    "    draw_box(ax, box, label_name, label_to_color(label))\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "import random\n",
    "\n",
    "metadata = list(filter(lambda x: len(x[\"boxes\"]) <= 4, metadata))\n",
    "random.shuffle(metadata)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "#### Creating a YOLO model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "image_size = 448\n",
    "\n",
    "backbone = keras_hub.models.Backbone.from_preset(\n",
    "    \"resnet_50_imagenet\",\n",
    ")\n",
    "preprocessor = keras_hub.layers.ImageConverter.from_preset(\n",
    "    \"resnet_50_imagenet\",\n",
    "    image_size=(image_size, image_size),\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "from keras import layers\n",
    "\n",
    "grid_size = 6\n",
    "num_labels = 91\n",
    "\n",
    "inputs = keras.Input(shape=(image_size, image_size, 3))\n",
    "x = backbone(inputs)\n",
    "x = layers.Conv2D(512, (3, 3), strides=(2, 2))(x)\n",
    "x = keras.layers.Flatten()(x)\n",
    "x = layers.Dense(2048, activation=\"relu\", kernel_initializer=\"glorot_normal\")(x)\n",
    "x = layers.Dropout(0.5)(x)\n",
    "x = layers.Dense(grid_size * grid_size * (num_labels + 5))(x)\n",
    "x = layers.Reshape((grid_size, grid_size, num_labels + 5))(x)\n",
    "box_predictions = x[..., :5]\n",
    "class_predictions = layers.Activation(\"softmax\")(x[..., 5:])\n",
    "outputs = {\"box\": box_predictions, \"class\": class_predictions}\n",
    "model = keras.Model(inputs, outputs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "model.summary()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "#### Readying the COCO data for the YOLO model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "def to_grid(box):\n",
    "    x, y, w, h = box\n",
    "    cx, cy = (x + w / 2) * grid_size, (y + h / 2) * grid_size\n",
    "    ix, iy = int(cx), int(cy)\n",
    "    return (ix, iy), (cx - ix, cy - iy, w, h)\n",
    "\n",
    "def from_grid(loc, box):\n",
    "    (xi, yi), (x, y, w, h) = loc, box\n",
    "    x = (xi + x) / grid_size - w / 2\n",
    "    y = (yi + y) / grid_size - h / 2\n",
    "    return (x, y, w, h)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import math\n",
    "\n",
    "class_array = np.zeros((len(metadata), grid_size, grid_size))\n",
    "box_array = np.zeros((len(metadata), grid_size, grid_size, 5))\n",
    "\n",
    "for index, sample in enumerate(metadata):\n",
    "    boxes, labels = sample[\"boxes\"], sample[\"labels\"]\n",
    "    for box, label in zip(boxes, labels):\n",
    "        (x, y, w, h) = box\n",
    "        left, right = math.floor(x * grid_size), math.ceil((x + w) * grid_size)\n",
    "        bottom, top = math.floor(y * grid_size), math.ceil((y + h) * grid_size)\n",
    "        class_array[index, bottom:top, left:right] = label\n",
    "\n",
    "for index, sample in enumerate(metadata):\n",
    "    boxes, labels = sample[\"boxes\"], sample[\"labels\"]\n",
    "    for box, label in zip(boxes, labels):\n",
    "        (xi, yi), (grid_box) = to_grid(box)\n",
    "        box_array[index, yi, xi] = [*grid_box, 1.0]\n",
    "        class_array[index, yi, xi] = label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "def draw_prediction(image, boxes, classes, cutoff=None):\n",
    "    fig, ax = plt.subplots(dpi=300)\n",
    "    draw_image(ax, image)\n",
    "    for yi, row in enumerate(classes):\n",
    "        for xi, label in enumerate(row):\n",
    "            color = label_to_color(label) if label else \"none\"\n",
    "            x, y, w, h = (v / grid_size for v in (xi, yi, 1.0, 1.0))\n",
    "            r = Rectangle((x, y), w, h, lw=2, ec=\"black\", fc=color, alpha=0.5)\n",
    "            ax.add_patch(r)\n",
    "    for yi, row in enumerate(boxes):\n",
    "        for xi, box in enumerate(row):\n",
    "            box, confidence = box[:4], box[4]\n",
    "            if not cutoff or confidence >= cutoff:\n",
    "                box = from_grid((xi, yi), box)\n",
    "                label = classes[yi, xi]\n",
    "                color = label_to_color(label)\n",
    "                name = keras_hub.utils.coco_id_to_name(label)\n",
    "                draw_box(ax, box, f\"{name} {max(confidence, 0):.2f}\", color)\n",
    "    plt.show()\n",
    "\n",
    "draw_prediction(metadata[0][\"path\"], box_array[0], class_array[0], cutoff=1.0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "\n",
    "def load_image(path):\n",
    "    x = tf.io.read_file(path)\n",
    "    x = tf.image.decode_jpeg(x, channels=3)\n",
    "    return preprocessor(x)\n",
    "\n",
    "images = tf.data.Dataset.from_tensor_slices([x[\"path\"] for x in metadata])\n",
    "images = images.map(load_image, num_parallel_calls=8)\n",
    "labels = {\"box\": box_array, \"class\": class_array}\n",
    "labels = tf.data.Dataset.from_tensor_slices(labels)\n",
    "\n",
    "dataset = tf.data.Dataset.zip(images, labels).batch(16).prefetch(2)\n",
    "val_dataset, train_dataset = dataset.take(500), dataset.skip(500)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "#### Training the YOLO model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "from keras import ops\n",
    "\n",
    "def unpack(box):\n",
    "    return box[..., 0], box[..., 1], box[..., 2], box[..., 3]\n",
    "\n",
    "def intersection(box1, box2):\n",
    "    cx1, cy1, w1, h1 = unpack(box1)\n",
    "    cx2, cy2, w2, h2 = unpack(box2)\n",
    "    left = ops.maximum(cx1 - w1 / 2, cx2 - w2 / 2)\n",
    "    bottom = ops.maximum(cy1 - h1 / 2, cy2 - h2 / 2)\n",
    "    right = ops.minimum(cx1 + w1 / 2, cx2 + w2 / 2)\n",
    "    top = ops.minimum(cy1 + h1 / 2, cy2 + h2 / 2)\n",
    "    return ops.maximum(0.0, right - left) * ops.maximum(0.0, top - bottom)\n",
    "\n",
    "def intersection_over_union(box1, box2):\n",
    "    cx1, cy1, w1, h1 = unpack(box1)\n",
    "    cx2, cy2, w2, h2 = unpack(box2)\n",
    "    intersection_area = intersection(box1, box2)\n",
    "    a1 = ops.maximum(w1, 0.0) * ops.maximum(h1, 0.0)\n",
    "    a2 = ops.maximum(w2, 0.0) * ops.maximum(h2, 0.0)\n",
    "    union_area = a1 + a2 - intersection_area\n",
    "    return ops.divide_no_nan(intersection_area, union_area)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "def signed_sqrt(x):\n",
    "    return ops.sign(x) * ops.sqrt(ops.absolute(x) + keras.config.epsilon())\n",
    "\n",
    "def box_loss(true, pred):\n",
    "    xy_true, wh_true, conf_true = true[..., :2], true[..., 2:4], true[..., 4:]\n",
    "    xy_pred, wh_pred, conf_pred = pred[..., :2], pred[..., 2:4], pred[..., 4:]\n",
    "    no_object = conf_true == 0.0\n",
    "    xy_error = ops.square(xy_true - xy_pred)\n",
    "    wh_error = ops.square(signed_sqrt(wh_true) - signed_sqrt(wh_pred))\n",
    "    iou = intersection_over_union(true, pred)\n",
    "    conf_target = ops.where(no_object, 0.0, ops.expand_dims(iou, -1))\n",
    "    conf_error = ops.square(conf_target - conf_pred)\n",
    "    error = ops.concatenate(\n",
    "        (\n",
    "            ops.where(no_object, 0.0, xy_error * 5.0),\n",
    "            ops.where(no_object, 0.0, wh_error * 5.0),\n",
    "            ops.where(no_object, conf_error * 0.5, conf_error),\n",
    "        ),\n",
    "        axis=-1,\n",
    "    )\n",
    "    return ops.sum(error, axis=(1, 2, 3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "model.compile(\n",
    "    optimizer=keras.optimizers.Adam(2e-4),\n",
    "    loss={\"box\": box_loss, \"class\": \"sparse_categorical_crossentropy\"},\n",
    ")\n",
    "model.fit(\n",
    "    train_dataset,\n",
    "    validation_data=val_dataset,\n",
    "    epochs=4,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "x, y = next(iter(val_dataset.rebatch(1)))\n",
    "preds = model.predict(x)\n",
    "boxes = preds[\"box\"][0]\n",
    "classes = np.argmax(preds[\"class\"][0], axis=-1)\n",
    "path = metadata[0][\"path\"]\n",
    "draw_prediction(path, boxes, classes, cutoff=0.1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "draw_prediction(path, boxes, classes, cutoff=None)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "### Using a pretrained RetinaNet detector"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "url = \"https://s3.us-east-1.amazonaws.com/book.keras.io/3e/seurat.jpg\"\n",
    "path = keras.utils.get_file(origin=url)\n",
    "image = np.array([keras.utils.load_img(path)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "detector = keras_hub.models.ObjectDetector.from_preset(\n",
    "    \"retinanet_resnet50_fpn_v2_coco\",\n",
    "    bounding_box_format=\"rel_xywh\",\n",
    ")\n",
    "predictions = detector.predict(image)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "[(k, v.shape) for k, v in predictions.items()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "predictions[\"boxes\"][0][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(dpi=300)\n",
    "draw_image(ax, path)\n",
    "num_detections = predictions[\"num_detections\"][0]\n",
    "for i in range(num_detections):\n",
    "    box = predictions[\"boxes\"][0][i]\n",
    "    label = predictions[\"labels\"][0][i]\n",
    "    label_name = keras_hub.utils.coco_id_to_name(label)\n",
    "    draw_box(ax, box, label_name, label_to_color(label))\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "collapsed_sections": [],
   "name": "chapter12_object-detection",
   "private_outputs": false,
   "provenance": [],
   "toc_visible": true
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}